cls
clear all
local dir "~\Dropbox\Working Papers\Distinction Effect"
cd "`dir'"

use "Data\Intermediates\OLE_2007_2016.dta", clear

merge 1:1 national* using "Data\Intermediates\Pila_2009_2016.dta"
keep if _merge == 3 | _merge == 1
drop _merge

//Establishment ID
foreach year of numlist 2009(1)2016 {
	rename pila_firm_id_`year' aux
	egen pila_firm_id_`year' = sieve(aux), keep(n)
	drop aux
	
	gen firm_id_`year' = pila_firm_id_`year' if pila_firm_type_`year' == "NI" | pila_firm_type_`year' == "NIT"
}

//Graduation Year Postgraduate Degree
tostring fst_g_gradsch, gen(year_gradsch)
replace year_gradsch = substr(year_gradsch, 1, 4)
destring year_gradsch, replace

//Graduation Year Undergraduate Degree
tostring fst_grad, gen(year_grad)
replace year_grad = substr(year_grad, 1, 4)
destring year_grad, replace

//Sample Restriction 
drop if year_grad == .
drop if year_gradsch <= year_grad
drop if ole_birthdate == .

//Program
destring ole_gradtime1 ole_gradtime2, replace
gen programcode = ole_programcode1 if ole_gradtime1==fst_grad 
replace programcode = ole_programcode2 if ole_gradtime2==fst_grad & programcode == ""
replace programcode = ole_programcode3 if regexm(ole_gradtime3, ",") == 0 & programcode == ""
drop if programcode == ""

gen fieldcode = ole_programfield1 if ole_gradtime1==fst_grad 
replace fieldcode = ole_programfield2 if ole_gradtime2==fst_grad & fieldcode == ""
replace fieldcode = ole_programfield3 if regexm(ole_gradtime3, ",") == 0 & fieldcode == ""
drop if fieldcode  == ""

//Program Level
gen aux = ole_programlevel1 if ole_programcode1==programcode
bys programcode : egen program_level = mode(aux)

//Restrict Sample to 4Y Programs
keep if program_level == "6"

keep national_id* year_grad year_gradsch ole_birthdate firm_id_???? fieldcode programcode

reshape long firm_id_, i(national_id*) j(year)
rename firm_id_ firm_id

drop if firm_id == ""
keep if year >= year_grad
drop if national_id == firm_id

//Keep if there are at least 4 different workers from 2009 to 2016
preserve
	duplicates drop national* firm_id, force
	gen n = 1
	collapse (sum) n, by(firm_id)
	keep if n >= 4
	drop n
	count
	tempfile Establishments
	save `Establishments'
restore
	
merge m:1 firm_id using `Establishments'
keep if _merge == 3
drop _merge

//Age
gen age = year - year(ole_birthdate)

//Age Restrictions
bys national* : egen max = max(age)
keep if max >= 22 & max <= 62
drop max
drop if age <= 18
bys national* : egen min = min(age)
drop if min > 55
drop min

gen n = 1
collapse (sum) n, by(firm_id programcode fieldcode year)

bys firm_id year : egen N = total(n)
gen share = n/N

keep share firm_id programcode fieldcode year
reshape wide share, i(firm_id programcode) j(year)
destring programcode fieldcode, replace

order programcode firm_id fieldcode
sort programcode firm_id
save "Data\Finals\FirmProgram.dta", replace

use "Data\Finals\FirmProgram.dta", clear

//Programs Taking Saber Pro
preserve
use "Data\Intermediates\SaberPro_2006-2010.dta", clear
keep if program_level == 3
keep if exam_year <= 2009
keep spro_sniescode
rename spro_sniescode programcode
duplicates drop programcode, force
tempfile Programs
save `Programs', replace
restore

merge m:1 programcode using `Programs'
keep if _merge == 3
drop _merge

//Firms in Within-Industry Productivity Sample
preserve
use "Data\Finals\FirmIndustryRanking.dta", clear
keep firm_id
tempfile Firms
save `Firms', replace
restore

merge m:1 firm_id using `Firms'
keep if _merge == 3
drop _merge

//Program Networking
foreach var of varlist share* {
replace `var' = 0 if `var' == .
}

egen maxshare = rowmax(share????)

//Drop firms that have employed a low share of students across time
gsort programcode maxshare 
by programcode : egen aux1 = seq()
by programcode : egen aux2 = max(aux1)
gen firms_outnetwork = aux1/aux2

sum firms_outnetwork, d
drop if firms_outnetwork < 0.1
drop aux* firms_outnetwork 

sum maxshare, d
drop if maxshare <r(p5)

//Within-Field Low (High) Network: Number of Firms where Students Find Employment

//Firms in the Field
unique firm_id, by(fieldcode) gen(aux)
bys fieldcode: egen field_firms = max(aux)
drop aux

//Firms where Alumni Works
unique firm_id, by(programcode) gen(aux)
bys programcode: egen program_firms = max(aux)
drop aux

gen share_firms = program_firms/field_firms
duplicates drop programcode, force 

keep programcode fieldcode field_firms program_firms share_firms

sum share_firms, d
gen high_network = share_firms >= r(p75)

keep programcode high_network

save "Data\Finals\ProgramNetwork.dta", replace
